'''
 @Author: 王谦璘
 @Date: 2023/04/14/9:17
 @Description: 
'''
# -*- coding: utf8 -*-
import sys
import pandas as pd
import numpy as np
def validation_mileage(data, k): # data：dataframe格式，k: IQR的系数
    """无效滤除"""

    # 识别有效里程范围
    mileage = data.loc[: , "公里标"] # mileage: 里程
    Q1= np.percentile(mileage, 25) # 下四分位数
    Q3= np.percentile(mileage, 75) # 上四分位数
    IQR = Q3 - Q1 # 四分位距
    label = [1] * len(mileage)
    for i in range(len(mileage)):
        if mileage[i] < Q1 - k * IQR or mileage[i] > Q3 + k * IQR:
            label[i] = 0
    data["是否有效（1：有效，0：无效）"] = label
    # print(Q1 - k * IQR, Q3 + k * IQR)
    return data

if __name__ == "__main__":
    filepath = sys.argv[1]
    # filepath = "D:/数据/供电数据/01-石长线/石长线-上行-捞刀河站-石门县南站-2020-06-09.xlsx"
    data = pd.read_excel(filepath)
    data_new = validation_mileage(data, 0.75)
    savepath = filepath.split(".")[0] + "_filtered"
    data_new.to_excel(savepath + ".xlsx")
    print("1")
